import pandas as pd
data = pd.read_excel('用户评价.xlsx', sheet_name=0)
print(data.head())

# 分词
import jieba
words = jieba.cut(data.iloc[3, 0])
result = '/'.join(words)
print(result)

good = data.query('评价类型 == "好评"')
good = good['评价内容'].tolist()
good = ''.join(good)
good_seg_list = jieba.cut(good)

# 过滤停用词
with open(file='stopwords.txt', mode='r', encoding='utf-8') as f:
    stopwords = f.read().splitlines()
extra_stopwords = [' ', '宝贝', '三脚架', '架子', '支架', '上午', '下午', '挺', '不错', '买', '选购', '朋友']
stopwords += extra_stopwords

good_filtered = []
for w in good_seg_list:
    if w not in stopwords:
        good_filtered.append(w.lower())

# 统计词频
from collections import Counter
good_frq = Counter(good_filtered).most_common(50)
print(good_frq)

# 绘制词云图
import pyecharts.options as opts
from pyecharts.charts import WordCloud
chart = WordCloud()
chart.add(series_name='数量', data_pair=good_frq, word_size_range=[10, 80])
chart.set_global_opts(title_opts=opts.TitleOpts(title='用户偏好分析', title_textstyle_opts=opts.TextStyleOpts(font_size=30)), tooltip_opts=opts.TooltipOpts(is_show=True))
chart.render('词云图.html')
